## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
The following plots are exploratory analysis and trial visualizations to include in the dashboard. The aesthetics and interactive capabilities are still in need of being improved. In addition, the margins of error
barchart for 2018
ggplotly(ggplot(filter(acs_counties, year == 2018), aes(x = NAME, y = median_household_income,
text = paste0("Region: ", NAME,
"<br>Year: ", year,
"<br>Median Household Income: $", median_household_income,
"<br>Margin of Error: $", median_household_income_moe)))+
geom_col(fill = "dark blue")+
geom_errorbar(aes(x = NAME, ymin = median_household_income - median_household_income_moe,
ymax = median_household_income + median_household_income_moe), color = "dark orange") +
geom_point(color = "dark orange", size = 3)+ theme_minimal()+theme(axis.text.x = element_text(angle=45)) +
ggtitle("Median Household Income") + ylab("Median Household Income") + xlab("Region"), tooltip="text")
Try a line chart with all years present
# grouped line chart for all years: each geography is its own color
p <- ggplot(acs_counties, aes(x=year, y=median_household_income, group = NAME, color = NAME,
text = paste0("Region: ", NAME,
"<br>Year: ", year,
"<br>Median Household Income: $", median_household_income,
"<br>Margin of Error: $", median_household_income_moe))) +
geom_line() +
geom_point() +
#scale_colour_manual(values = dspgpal) +
#geom_pointrange(aes(ymin=median_household_income - median_household_income_moe, ymax=median_household_income + median_household_income_moe)) +
theme_minimal() + ggtitle("Median Household Income 2015-2018") + ylab("Median Household Income") + xlab("Year")
#Note: Wasco and south wasco are from ACS5 year estimates. Moving averages.
ggplotly(p, tooltip = "text") %>% config(displayModeBar = "static", displaylogo = FALSE,
modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d",
"hoverClosestCartesian", "hoverCompareCartesian","resetScale2d"))
Easier to see trends, but lots of colors make it quite busy. Only drawback to the line graphs as opposed to the bar charts is that margin of error cannot be seen visually. But the tooltip provided by plotly is great for keeping that data.
Same line chart but colors are only kept for south wasco, wasco county, and the state geography.
p <- ggplot(acs_counties %>% mutate(south_wasco = fct_other(NAME, keep = c("South Wasco County School District 1, Oregon", "Wasco County, Oregon", "Oregon"),
other_level = "Neighboring Counties"))
, aes(x=year, y=median_household_income, group = NAME, color = south_wasco,
text = paste0("Region: ", NAME,
"<br>Year: ", year,
"<br>Median Household Income: $", median_household_income,
"<br>Margin of Error: $", median_household_income_moe))) +
geom_line(size = 1.5) +
geom_point(size = 2) +
scale_colour_manual(name = "Region", values = c(dspgpal[1], dspgpal[9], dspgpal[2], dspgpal[10])) +
scale_alpha_manual(values=c(1,1,1,0.3)) +
#geom_pointrange(aes(ymin=median_household_income - median_household_income_moe, ymax=median_household_income + median_household_income_moe)) +
theme_minimal() + ggtitle("Median Household Income 2015-2018") + ylab("Median Household Income") + xlab("Year")
#Note: Wasco and south wasco are from ACS5 year estimates. Moving averages.
ggplotly(p, tooltip = "text") %>% config(displayModeBar = "static", displaylogo = FALSE,
modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d",
"hoverClosestCartesian", "hoverCompareCartesian","resetScale2d"))
#n
Much cleaner looking plot! It's obvious where south wasco ranks in relation to the rest of the counties and geographies. We see that South wasco has the lowest median houshold income, but is very similar to that of Sherman county. Generally, the state of Oregon is seeing a steading increase in median household income, however, south wasco's growth pattern has just started increasing from 2016 to now.
Rather than just looking at median income, we can see where the rest of the community is distributed in household income.
#grouped bar charts
income <- select(filter(acs_counties, year == 2018), NAME, contains("income"))
income <- income %>% select(!contains("moe"), -median_household_income)
income <- melt(income, id.vars = "NAME", measure.vars = colnames(income)[-1])
ggplotly(ggplot(income)+
geom_col(aes(x = NAME, y = value, fill = variable), position = "dodge")+
scale_fill_discrete(name = "Income Bracket", labels = c("Less than 10,000", "10,000-14,999", "15,000-24,999",
"25,000-34,999", "35,000-49,999", "50,000-74,999",
"75,000-99,999","100,000-149,999", "150,000-199,999", "above 200,000")) +
scale_colour_manual(name = "Income Bracket", values = dspgpal) +
#theme_minimal()+theme(axis.text.x = element_text(angle=30)) +
ylab("% of Population") + xlab("Region") +
ggtitle("Income Distribution for 2018") + coord_flip())
The plot is quite dense, but we can see how the distributions of income compare across the different counties and geographies. South Wasco has close to 50% of their population of households earning between 35,000 and 74,999 dollars. But they do have some of the highest percentages of households in the lowest income bracket along with skamania, klickitat and jefferson county.
Trying to visualize with a stacked bar chart
#stacked bar charts
ggplotly(ggplot(income, aes(fill=variable, y=value, x=NAME))+
geom_bar(position = position_stack(reverse = TRUE), stat="identity")+
scale_fill_discrete(name = "Income Bracket", labels = c("Less than 10,000", "10,000-14,999", "15,000-24,999",
"25,000-34,999", "35,000-49,999", "50,000-74,999",
"75,000-99,999","100,000-149,999", "150,000-199,999", "above 200,000")) +
ylab("% of Population") + xlab("Region") +
#theme_minimal()+theme(axis.text.x = element_text(angle=30)) +
ggtitle("Income Distribution for 2018") + coord_flip())%>%
config(displayModeBar = "static", displaylogo = FALSE,
modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d","hoverClosestCartesian",
"hoverCompareCartesian","resetScale2d"))
This stacked bar chart condenses the busy plot of distributions from the previous grouped bar. It is easier to see where the heavier densities are at the income extremes.
Line chart for federal Poverty rates in 2018
ggplotly(ggplot(filter(acs_counties, year == 2018), aes(x = NAME, y = below_poverty,
text = paste0("Region: ", NAME,
"<br>Year: ", year,
"<br>Percent Below Federal Poverty: ", below_poverty, "%",
"<br>Margin of Error: ", below_poverty_moe, "%"))) +
geom_col(fill = "dark blue") +
geom_errorbar(aes(x = NAME, ymin = below_poverty - below_poverty_moe,
ymax = below_poverty + below_poverty_moe), color = "dark orange") +
geom_point(color = "dark orange", size = 3) + theme_minimal() + theme(axis.text.x = element_text(angle=30)) +
xlab("Region") + ylab("% Below Poverty") + ggtitle("% of Population Below Federal Poverty Line"), tooltip = "text") %>%
config(displayModeBar = "static", displaylogo = FALSE,
modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d","hoverClosestCartesian", "hoverCompareCartesian","resetScale2d"))
ggplot() +
geom_sf(data = filter(acs_tracts, year == 2018), aes(fill = below_poverty)) +
geom_sf(fill = "transparent", color = "gray20", size = 1,
data = acs_tracts %>% group_by(COUNTYFP) %>% summarise()) + theme_minimal() +
labs(title = paste("Percent of population below poverty by census track in", 2018, sep=" "))
static bar chart
# bar graphs
ggplot(filter(acs_counties, year == 2018), aes(x = NAME, y = employment_20_to_64)) +
geom_col(fill = "dark blue")+
geom_errorbar(aes(x = NAME, ymin = employment_20_to_64 - employment_20_to_64_moe,
ymax = employment_20_to_64 + employment_20_to_64_moe), color = "dark orange") +
theme_minimal() + theme(axis.text.x = element_text(angle=30)) +
geom_point(color = "dark orange", size = 3) + ggtitle("% of Adults (20-64) with Employment Status")
static map
# sf map
ggplot() +
geom_sf(data = filter(acs_tracts, year == 2018), aes(fill = employment_20_to_64)) +
labs(title = "Percent of employed adults adults 20 to 64 by census track") #+
p <- ggplot(acs_counties %>% mutate(south_wasco = fct_other(NAME, keep = c("South Wasco County School District 1, Oregon", "Wasco County, Oregon", "Oregon"),
other_level = "Neighboring Counties"))
, aes(x=year, y=affordable_housing_all_perc, group = NAME, color = south_wasco,
text = paste0("Region: ", NAME,
"<br>Year: ", year,
"<br>Affordable Housing: ", round(affordable_housing_all_perc, digits = 1), "%"))) +
geom_line(size = 1.5) +
geom_point(size = 2) +
scale_colour_manual(name = "Region", values = c(dspgpal[1], dspgpal[9], dspgpal[2], dspgpal[10])) +
#geom_pointrange(aes(ymin=median_household_income - median_household_income_moe, ymax=median_household_income + median_household_income_moe)) +
theme_minimal() + ggtitle("Affordable Housing 2015-2018") + ylab("Affordable Housing") + xlab("Year")
#Note: Wasco and south wasco are from ACS5 year estimates. Moving averages.
ggplotly(p, tooltip = "text") %>% config(displayModeBar = "static", displaylogo = FALSE,
modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d",
"hoverClosestCartesian", "hoverCompareCartesian","resetScale2d"))
The line graph looks very busy, but an adjustment to the line type and transparency should help. Generally, we can see that in 2018, South Wasco has one of the highest percenteges of affordable housing amongst occupied housing units.
housing <- select(filter(acs_counties, year == 2018), NAME, contains("affordable_housing"))
housing_rent_own <- housing %>% select(NAME, affordable_housing_own_perc, affordable_housing_rent_perc)
housing_rent_own <- melt(housing_rent_own, id.vars = "NAME", measure.vars = colnames(housing_rent_own)[-c(1,4)])
#grouped bar chart for own and rent occupancy
ggplotly(ggplot(housing_rent_own, aes(x = NAME, y = value, fill = variable),
text = paste0("Region: ", NAME,
"<br>Year: ", year,
"<br>Affordable Housing: ", round(value, digits = 1), "%")) +
geom_col(position = "dodge") +
scale_fill_discrete(name = "Housing Ownership", labels = c("Own", "Rent")) +
#theme_minimal() + theme(axis.text.x = element_text(angle=30)) +
ylab("% of Occupied housing units") + xlab("Region") + coord_flip() + theme_minimal() +
ggtitle("Affordable Housing 2015-2018", subtitle = "Occupied households where monthly costs are less than 30% of houshold income"), tooltip = "text")
Of the occupied housing units, South Wasco's community ranks highly in being able to afford their monthly housing costs given their houshold income.
#divergent bar chart to split up own and rent occupancy
housing_diverge <- housing_rent_own %>% mutate(value = as.numeric(ifelse(variable == "affordable_housing_own_perc",
value, -1*value)))
ggplotly(ggplot(housing_diverge,
aes(x = NAME, y = value, fill = variable,
text = paste0("Region: ", NAME,
"<br>Year: ", 2018,
"<br>Affordable Housing: ", round(abs(value), digits = 1), "%")))+
geom_bar(stat = "identity") +
scale_y_continuous(breaks = pretty(housing_diverge$value), labels = abs(pretty(housing_diverge$value))) +
scale_colour_manual(name = "Housing Ownership", values = c(dspgpal[1], dspgpal[9])) +
scale_fill_discrete(name = "Housing Ownership", labels = c("Own", "Rent")) +
theme_minimal() + labs(x="Region",y="% of Occupied Housing Units") +
coord_flip(), tooltip = "text") %>% layout(title = list(text = paste0("Affordable Housing 2015-2018",
'<br>','<sup>',
"% of occupied households where monthly costs are less than 30% of houshold income",
'</sup>')))
An alternative way to visualize the percentage of occupied housing units whose monthly housing costs are less than 30% of household income
% of affordable housing by household income bracket
housing <- select(filter(acs_counties, year == 2018), NAME, contains("affordable_housing"))
housing_by_income <- housing %>% select(NAME, !contains("perc") & !contains("total"))
housing_by_income <- melt(housing_by_income, id.vars = "NAME", measure.vars = colnames(housing_by_income)[-c(1,4)])
#grouped bar chart for own and rent occupancy
ggplotly(ggplot(housing_by_income, aes(x = NAME, y = value, fill = variable),
text = paste0("Region: ", NAME,
"<br>Year: ", year,
"<br>Affordable Housing: ", round(value, digits = 1), "%")) +
geom_col(position = "dodge") +
#scale_fill_discrete(name = "Housing Ownership", labels = c("Own", "Rent")) +
#theme_minimal() + theme(axis.text.x = element_text(angle=30)) +
ylab("% of Occupied housing units") + xlab("Region") + coord_flip() +
ggtitle("Affordable Housing 2015-2018", subtitle = "Occupied households where monthly costs are less than 30% of houshold income"), tooltip = "text")
Social
Racial Diversity
This plot is not yet interactive, and needs some work on ordering of the grouped bars. But generally, south wasco, and its surrounding neighbors are majority a white puplation. The second most populus racial group are Hispanic or Latino group.
Family Stability
Compared to its immediate neighbors, South Wasco has the highest percentage of children who are in a household with no family members. South Wasco also has the highest percentage of people living alone or with non family members.
Educational Attainment
Generally, South Wasco has a large percentage of adults that are high school graduates and have some higher education. However, It has some of the lowest percentages of adults that havea bachelors degree or higher. Hood river has the highest educated population.